Stephens lab data accessed from http://www.gtexportal.org/static/datasets/gtex_analysis_pilot_v3/multi_tissue_eqtls/Multi_tissue_eQTL_GTEx_Pilot_Phase_datasets.tar on 20150722.
From README: “We are using the eQTL posterior probabilities from the UC Multi-tissue eQTL method (doi:10.1371/journal.pgen.1003486) for each of the 9 tissues analyzed in the pilot phase (Adipose_subcutaneous, Artery_Tibal, Whole_Blood, Heart_Left_Ventricle, Lung, Muscle_Skeletal, Nerve_Tibial, Skin_Lower_Leg_Sun_Exposed, Thyroid) in the file res_final_uc_com_genes_com_snps.txt.gz. These values may be interpreted as Pr(SNP is eQTL in tissue s | data). 9875 eGenes are presented, with the”top" (most significant) SNP in each gene used."
library(dplyr)
library(plyr)
library(tidyr)
library(ggplot2)
library(GGally)
"%&%" = function(a,b) paste(a,b,sep="")
mt.dir <- "~/GitHub/GenArch/GenArchPaper/UC_Stephens_Multitissue_Comp/"
#tarbell mt.dir <- "/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/Multi_tissue_eQTL_GTEx_Pilot_Phase_datasets/"
h2.dir <- "~/GitHub/GenArch/GenArchPaper/BSLMM/bslmm_gtex_results/"
#tarbell h2.dir <- "/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/"
bslmm.dir <- "~/GitHub/GenArch/GenArchPaper/BSLMM/bslmm_gtex_results/"
#tarbell bslmm.dir <- "/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/BSLMM_exp/"
mt <- read.table(mt.dir %&% "res_final_uc_com_genes_com_snps.txt.gz",header=TRUE)
head(mt)
## gene snp Adipose Artery Blood Heart Lung Muscle
## 1 ENSG00000137288.4 rs11759971 0.6596 0.6547 0.5383 0.6264 0.6759 0.5699
## 2 ENSG00000168005.4 rs36077346 0.7835 0.7817 0.7632 0.7743 0.7928 0.5103
## 3 ENSG00000255893.1 rs657249 1.0000 1.0000 0.3647 1.0000 1.0000 1.0000
## 4 ENSG00000103227.12 rs2382946 0.9812 0.9985 0.9967 1.0000 0.9998 1.0000
## 5 ENSG00000188385.7 rs9733324 0.9939 0.9935 0.6668 0.9839 0.9949 0.9801
## 6 ENSG00000166257.4 rs1148088 0.4582 0.4515 0.3038 0.3608 0.5185 0.2841
## Nerve Skin Thyroid
## 1 0.6750 0.7638 0.6775
## 2 0.7922 0.7837 0.7932
## 3 1.0000 1.0000 1.0000
## 4 0.9988 0.9104 1.0000
## 5 0.9945 0.9926 0.9950
## 6 0.4813 0.4602 0.8391
dim(mt)
## [1] 9875 11
#remove version number in order to compare ensembl IDs
a <- substr(mt$gene,1,15)
mt <- mutate(mt,gene=a)
h2.ts <- read.table(bslmm.dir %&% "GTEx_Tissue-Specific_local_h2_se_geneinfo.txt",header=TRUE,sep="\t")
#remove version number in order to compare ensembl IDs
a <- substr(h2.ts$EnsemblGeneID,1,15)
h2.ts <- mutate(h2.ts,gene=a)
h2.tw <- read.table(bslmm.dir %&% "GTEx_Tissue-Wide_local_h2_se_geneinfo.txt",header=TRUE,sep="\t")
h2.tw <- mutate(h2.tw,gene=EnsemblGeneID)
#remove version number in order to compare ensembl IDs
a <- substr(h2.tw$EnsemblGeneID,1,15)
h2.tw <- mutate(h2.tw,gene=a)
data <- inner_join(mt,h2.ts,by='gene')
dim(data)
## [1] 4467 34
##only half overlap, why? gcta didn't converge? multi-tissue model didn't converge?
#test one tissue h2 at a time
tislist <- c('h2.AdiposeSubcutaneous','h2.ArteryTibial','h2.HeartLeftVentricle','h2.Lung','h2.MuscleSkeletal','h2.NerveTibial','h2.SkinSunExposedLowerleg','h2.Thyroid','h2.WholeBlood')
for(tis in tislist){
h2.tis <- h2.ts %>% select(gene,one_of(tis)) ##one_of allows character vector
newdata <- inner_join(mt,h2.tis,by='gene')
gdata <- gather(newdata,"tissue","Pr",3:11)
colnames(gdata)[3] <- "h2"
p <- ggplot(gdata,aes(x=h2,y=Pr))+facet_wrap(~tissue)+geom_point(alpha=0.4)+stat_smooth()+
coord_cartesian(ylim=c(-0.05,1.2))+xlab(tis)
cors <- ddply(gdata, .(tissue), summarise, cor = signif(cor(h2, Pr), 2))
p1 <- p + geom_text(data=cors, aes(label=paste("R = ", cor, sep="")), x=-Inf, y=Inf, hjust=-0.2, vjust=1.2, size=3)
pvals <- ddply(gdata, .(tissue), summarise, cor = signif(cor.test(h2, Pr)$p.value, 2))
p2 <- p1 + geom_text(data=pvals, aes(label=paste("p = ", cor, sep="")), x=Inf, y=Inf, hjust=1.2, vjust=1.2, size=3)
print(p2)
}
tis <- 'h2.CrossTissue'
h2.tis <- h2.ts %>% select(gene,one_of(tis)) ##one_of allows character vector
newdata <- inner_join(mt,h2.tis,by='gene')
gdata <- gather(newdata,"tissue","Pr",3:11)
colnames(gdata)[3] <- "h2"
p <- ggplot(gdata,aes(x=h2,y=Pr))+facet_wrap(~tissue)+geom_point(alpha=0.4)+stat_smooth()+
coord_cartesian(ylim=c(-0.05,1.2))+xlab(tis)
cors <- ddply(gdata, .(tissue), summarise, cor = signif(cor(h2, Pr), 2))
p1 <- p + geom_text(data=cors, aes(label=paste("R = ", cor, sep="")), x=-Inf, y=Inf, hjust=-0.2, vjust=1.2, size=3)
pvals <- ddply(gdata, .(tissue), summarise, cor = signif(cor.test(h2, Pr)$p.value, 2))
p2 <- p1 + geom_text(data=pvals, aes(label=paste("p = ", cor, sep="")), x=Inf, y=Inf, hjust=1.2, vjust=1.2, size=3)
print(p2)
#test one tissue h2 at a time
tislist <- c('h2.AdiposeSubcutaneous','h2.ArteryTibial','h2.HeartLeftVentricle','h2.Lung','h2.MuscleSkeletal','h2.NerveTibial','h2.SkinSunExposedLowerleg','h2.Thyroid','h2.WholeBlood')
for(tis in tislist){
h2.tis <- h2.tw %>% select(gene,one_of(tis)) ##one_of allows character vector
newdata <- inner_join(mt,h2.tis,by='gene')
gdata <- gather(newdata,"tissue","Pr",3:11)
colnames(gdata)[3] <- "h2"
p <- ggplot(gdata,aes(x=h2,y=Pr))+facet_wrap(~tissue)+geom_point(alpha=0.4)+stat_smooth()+
coord_cartesian(ylim=c(-0.05,1.2))+xlab(tis)
cors <- ddply(gdata, .(tissue), summarise, cor = signif(cor(h2, Pr), 2))
p1 <- p + geom_text(data=cors, aes(label=paste("R = ", cor, sep="")), x=-Inf, y=Inf, hjust=-0.2, vjust=1.2, size=3)
pvals <- ddply(gdata, .(tissue), summarise, cor = signif(cor.test(h2, Pr)$p.value, 2))
p2 <- p1 + geom_text(data=pvals, aes(label=paste("p = ", cor, sep="")), x=Inf, y=Inf, hjust=1.2, vjust=1.2, size=3)
print(p2)
}
meanmt <- apply(mt[,3:11],1,mean)
sdmt <- apply(mt[,3:11],1,sd)
cv <- sdmt/meanmt
newmt<-mutate(mt,CV=cv)
tislist <- c('h2.CrossTissue','h2.AdiposeSubcutaneous','h2.ArteryTibial','h2.HeartLeftVentricle','h2.Lung','h2.MuscleSkeletal','h2.NerveTibial','h2.SkinSunExposedLowerleg','h2.Thyroid','h2.WholeBlood')
for(tis in tislist){
h2.tis <- h2.ts %>% select(gene,one_of(tis)) ##one_of allows character vector
newdata <- inner_join(newmt,h2.tis,by='gene')
colnames(newdata)[13] <- "h2"
p <- ggplot(newdata,aes(x=h2,y=CV))+geom_point(alpha=0.4)+stat_smooth()+
xlab(tis)+ylab("Coef of variation (sd/mean) of Pr")
cors <- data.frame(cor=signif(cor(newdata$h2, newdata$CV), 2))
p1 <- p + geom_text(data=cors, aes(label=paste("R = ", cor, sep="")), x=-Inf, y=Inf, hjust=-0.2, vjust=1.2, size=4)
pvals <- data.frame(cor=signif(cor.test(newdata$h2, newdata$CV)$p.value, 2))
p2 <- p1 + geom_text(data=pvals, aes(label=paste("p = ", cor, sep="")), x=Inf, y=Inf, hjust=1.2, vjust=1.2, size=4)
print(p2)
}
meanmt <- apply(mt[,3:11],1,mean)
sdmt <- apply(mt[,3:11],1,sd)
cv <- sdmt/meanmt
newmt<-mutate(mt,CV=cv)
tislist <- c('h2.AdiposeSubcutaneous','h2.ArteryTibial','h2.HeartLeftVentricle','h2.Lung','h2.MuscleSkeletal','h2.NerveTibial','h2.SkinSunExposedLowerleg','h2.Thyroid','h2.WholeBlood')
for(tis in tislist){
h2.tis <- h2.tw %>% select(gene,one_of(tis)) ##one_of allows character vector
newdata <- inner_join(newmt,h2.tis,by='gene')
colnames(newdata)[13] <- "h2"
p <- ggplot(newdata,aes(x=h2,y=CV))+geom_point(alpha=0.4)+stat_smooth()+
xlab(tis)+ylab("Coef of variation (sd/mean) of Pr")
cors <- data.frame(cor=signif(cor(newdata$h2, newdata$CV), 2))
p1 <- p + geom_text(data=cors, aes(label=paste("R = ", cor, sep="")), x=-Inf, y=Inf, hjust=-0.2, vjust=1.2, size=4)
pvals <- data.frame(cor=signif(cor.test(newdata$h2, newdata$CV)$p.value, 2))
p2 <- p1 + geom_text(data=pvals, aes(label=paste("p = ", cor, sep="")), x=Inf, y=Inf, hjust=1.2, vjust=1.2, size=4)
print(p2)
}